# -------------------------------------------------------------------
# Purpose: Creates Table B25
# Author:  Max Posch, 25/07/2025
# Usage:   Source this script to generate the table.
# -------------------------------------------------------------------
# Check that required paths exist
stopifnot(dir.exists(pdataconfanalysis))
stopifnot(dir.exists(poutputappendix))


# Load data
load(file.path(pdataconfanalysis, "surnameCountyLevel19001940.RData"))


# Regressions
o <- list()
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ log_no_of_namelast_mp_adjp_s + sum_n_namelast_mp_adjp_ws | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year + gisjoin_1900[year], surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_evenness_namelast_mp_adjp_ws + log_no_of_namelast_mp_adjp_s + sum_n_namelast_mp_adjp_ws | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year + gisjoin_1900[year], surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_evenness_namelast_mp_adjp_ws + log_no_of_namelast_mp_adjp_s + sum_n_namelast_mp_adjp_ws | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year + gisjoin_1900[year], surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
o <- append(o, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ log_no_of_namelast_mp_adjp_s + sum_n_namelast_mp_adjp_ws | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year + gisjoin_1900[year], surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
o <- append(o, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_evenness_namelast_mp_adjp_ws + log_no_of_namelast_mp_adjp_s + sum_n_namelast_mp_adjp_ws | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year + gisjoin_1900[year], surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))
o <- append(o, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_evenness_namelast_mp_adjp_ws + log_no_of_namelast_mp_adjp_s + sum_n_namelast_mp_adjp_ws | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year + gisjoin_1900[year], surnameCountyLevel19001940, weights = surnameCountyLevel19001940$wgt_name)))


# Create table
x <- na.omit(surnameCountyLevel19001940[, .(sum_patents_pc_1900_f_w, wgt_name)])
y1_mean <- round(weighted.mean(x$sum_patents_pc_1900_f_w, x$wgt_name), 2)
y1_sd <- round(sqrt(weighted.mean((x$sum_patents_pc_1900_f_w - y1_mean)^2, x$wgt_name)), 2)
x <- na.omit(surnameCountyLevel19001940[, .(sum_break_p80_rrfsim05_pc_1900_f_w, wgt_name)])
y2_mean <- round(weighted.mean(x$sum_break_p80_rrfsim05_pc_1900_f_w, x$wgt_name), 2)
y2_sd <- round(sqrt(weighted.mean((x$sum_break_p80_rrfsim05_pc_1900_f_w - y2_mean)^2, x$wgt_name)), 2)
y1 <- paste0("\\makecell{Patents \\\\ per 1,000 people \\\\ (mean = ", y1_mean, ", sd = ", y1_sd, ")}")
y2 <- paste0("\\makecell{Breakthrough patents \\\\ per 1,000 people \\\\ (mean = ", y2_mean, ", sd = ", y2_sd, ")}")

setFixest_dict(
  c(
    entropy_namelast_mp_adjp_ws = "Surname diversity",
    log_no_of_namelast_mp_adjp_s = "Log distinct surnames",
    sum_n_namelast_mp_adjp_ws = "Population",
    entropy_evenness_namelast_mp_adjp_ws = "Surname dispersion",
    sum_patents_pc_1900_f_w = y1, sum_break_p80_rrfsim05_pc_1900_f_w = y2,
    year = "Period", statefip = "State", namelast_mp = "Surname", gisjoin_1900 = "County"
  )
)

tablename <- file.path(poutputappendix, "tableB25.tex")
etable(o,
  cluster = ~statefip,
  fitstat = ~n,
  order = c("diversity", "distinct", "dispersion"),
  digits = "r3", digits.stats = "r3",
  file = tablename, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
edit_table_content_fixed(tablename, "Period $\\times $ County", "County-specific linear trends")
add_table_row(tablename, "mean =", "\\cmidrule(lr){2-4}  \\cmidrule(lr){5-7}")
move_table_row(tablename, "Observations", "bottomrule")
remove_table_row(tablename, "County fixed effects")
writeLines(readLines(tablename)[-18], tablename)

cat("Table B25 saved to:", tablename, "\n")